pacman::p_load(dplyr,
               tidyr,
               stargazer,
               dotwhisker,
               lfe,
               scales,
               broom,
               kableExtra,
               modelsummary,
               csvy,
               ltm)

# TABLE A1 ----

congress <-
  read.csv(
    "data/clean/congress.csv"
  ) %>%
  filter(!is.na(year)) %>%
  mutate(type = case_when(pct_white > 0.5 ~ "Majority-white districts",
                          pct_black > 0.5 ~ "Majority-Black districts",
                          T~"Other majority-minority districts"),
         type = factor(type, levels=c("Majority-Black districts", "Other majority-minority districts", "Majority-white districts"), ordered=T),
         black = case_when(is.na(black)~0, T~black),
         majority_dem = case_when(dem_vote_share > 0.5~1, T~0))

rows <- tribble(~term, ~'(1)', ~'(2)', ~'(3)', ~'(4)',
                "Districts", "All", "All", "Majority-Democrat", "Majority-Democrat")
attr(rows, 'position') <- c(34)

taba1 <- modelsummary(models = list(lm(black ~ pct_white * yearnum, congress),
                                   lm(black ~ pct_white * factor(year), congress), 
                                   lm(black ~ pct_white * yearnum, congress[congress$majority_dem==TRUE,]),
                                   lm(black ~ pct_white * factor(year), congress[congress$majority_dem==TRUE,])),
                     stars = TRUE,
                     escape = F,
                     add_rows = rows,
                     gof_map = c("nobs", "r.squared"),
                     title = "\\label{tab:congtab}Estimated proportion Black MCs as a function of district \\% white and no. Congress",
                     coef_map = c("pct_white" = "District \\% white",
                                  "yearnum" = "No. Congress",
                                  "pct_white:yearnum" = "District \\% white $\\times$ no. Congress",
                                  "factor(year)2010" = "112th Congress",
                                  "factor(year)2012" = "113th Congress",
                                  "factor(year)2014" = "114th Congress",
                                  "factor(year)2016" = "115th Congress",
                                  "factor(year)2018" = "116th Congress",
                                  "factor(year)2020" = "117th Congress",
                                  "factor(year)2022" = "118th Congress",
                                  "pct_white:factor(year)2010" = "District \\% white  $\\times$ 112th Congress",
                                  "pct_white:factor(year)2012" = "District \\% white  $\\times$ 113th Congress",
                                  "pct_white:factor(year)2014" = "District \\% white  $\\times$ 114th Congress",
                                  "pct_white:factor(year)2016" = "District \\% white  $\\times$ 115th Congress",
                                  "pct_white:factor(year)2018" = "District \\% white  $\\times$ 116th Congress",
                                  "pct_white:factor(year)2020" = "District \\% white  $\\times$ 117th Congress",
                                  "pct_white:factor(year)2022" = "District \\% white  $\\times$ 118th Congress",
                                  "(Intercept)" = "Intercept"),
                     output = "kableExtra",
                     format = "latex"
) %>%
  add_header_above(c(" " = 1, "DV: Estimated proportion Black members of Congress" = 4)) %>%
  footnote("This table presents the results of ordinary least squares regression models. District % white is scaled from 0 to 1. No. Congress is scaled from 0 (111th Congress, elected 2008) to 7 (118th Congress, elected 2022). The reference category in Models (2) and (4) is the 111th Congress. Majority-Democratic districts are defined as those in which a majority of voters supported the Democratic nominee in the most recent presidential election.", threeparttable = T) %>%
  pack_rows("No. Congress", 7, 18) %>%
  pack_rows("District \\\\% white $\\\\times$ no. Congress", 19, 29, escape = F) %>%
  kable_styling(font_size = 9) %>%
  column_spec(2:5, width = "2cm")

save_kable(taba1, "results/taba1.tex",float = FALSE)



# TABLE A2 ----

dat <- read.csv("data/clean/SorensenChenPRQ_cleaned.csv") %>%
  filter(whitepct_nohisp>50&party=="Democratic") %>%
  mutate(experience = case_when(experience=="None"~"1",
                                T~experience),
         HHincome = rescale(HHincome, to = c(0,1)),
         whitepct_nohisp = rescale(whitepct_nohisp, to = c(0,1)),
         seniority = rescale(seniority, to = c(0,1))) %>%
  rowwise() %>%
  mutate(receipts = receipts/1000,
         indivs = indivs/1000,
         pacsandothercomittees = pacsandothercomittees/1000)

mod_full <- felm(pacsandothercomittees ~ factor(cycle) + # main effect of cycle
                   factor(cycle):black + # marginal effect of Black vs. white by cycle
                   gender + # gender
                   cook_folded + # safe district
                   status + # challenger/incumbent/open
                   educpct + # district % college educated
                   HHincome + # household income
                   whitepct_nohisp +  # district % non-white
                   seniority + # seniority
                   leadership + # leadership
                   comchair + # committee chair
                   experience  # held previous office
                 |0|0|id, # vote total
                 dat[dat$party=="Democratic",])


mod_biv <- felm(pacsandothercomittees ~ factor(cycle) + # main effect of cycle
                  factor(cycle):black # marginal effect of Black vs. white by cycle
                |0|0|id, # vote total
                dat[dat$party=="Democratic",])

taba2 <- stargazer(mod_biv, mod_full, type ="latex",
          header = F, 
          covariate.labels = c("2012", "2014", "2016", "2018", "2020", "2022",
                               "Female", "Folded Cook index", "Incumbent", "Open seat",
                               "District pct. college", "District HH income", "District pct. non-Hispanic white", "Seniority", "Leadership", "Committee chair",
                               "Prior elected office", "2010 | Black", "2012 | Black", "2014 | Black", "2016 | Black", "2018 | Black", "2020 | Black", "2022 | Black"),
          dep.var.labels = c("Receipts from PACs and committees"),
          title = "PAC and other committee receipts from Black and white Democratic frontrunners in majority-white congressional districts, 2010-2022.",
          font.size = "small",
          single.row = T,
          omit.stat = c("f", "ser"),
          column.sep.width = "-3pt", 
          star.char = c("*", "**", "***"),
          star.cutoffs = c(.05, .01, .001),
          label = paste0("tab:", knitr::opts_current$get("label")),
          notes = c("\\parbox[t]{10cm}{Data are from the Federal Election Commission, Daily Kos, Ballotpedia, Project VoteSmart, the 2019 ACS, and candidates' and MCs' professional websites. Omitted category is white male challengers to incumbents without prior elected experience. Outcome is scaled in thousands of dollars. All independent variables scaled 0-1.}"),
          notes.align = "l")

writeLines(taba1, "results/taba2.tex")

# FIGURE B1 ----
by_project_out <- read.csv("data/clean/MMs_wvb_respracerespparty_main3_weighted.csv")
dwplot(by_project_out,
       vars_order = c("1988 LA Times",
                      "1989 LA Times", 
                      "2004 KN", 
                      "2012 GfK", 
                      "2012 MTurk", 
                      "2013 GFK", 
                      "2015 YouGov", 
                      "2016 Facebook", 
                      "2016 MTurk", 
                      "2016 MTurk 2", 
                      "2016 Qualtrics", 
                      "2016 Research Now/SSI",
                      "2016 SSI", 
                      "2016 SSI 2", 
                      "2016 SSI 3", 
                      "2016 YouGov", 
                      "2016 YouGov 2", 
                      "2016 YouGov 3", 
                      "2017 MTurk", 
                      "2018 Cint", 
                      "2018 Lucid",
                      "2018 Lucid 2", 
                      "2018 YouGov", 
                      "2019 Fuqua Behavioral Lab", 
                      "2019 MTurk", 
                      "2019 Qualtrics", 
                      "2019 YouGov", 
                      "2019 YouGov 2", 
                      "2019 YouGov 3", 
                      "2020 Lucid", 
                      "2020 MTurk",
                      "2020 YouGov", 
                      "2020 YouGov 2",
                      "2020 YouGov 3", 
                      "2021 Lucid", 
                      "2022 Lucid", 
                      "2022 Lucid 2",
                      "2023 Lucid", 
                      "2023 Lucid 2", 
                      "2023 Lucid 3", 
                      "2023 CA voter file",
                      "2023 Lucid 4"),
       vline = geom_vline(xintercept = 0.5, color = "darkgrey", linetype = "dashed")) +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(family = "serif"),
        panel.grid = element_blank(),
        strip.placement = "outside",
        strip.background = element_rect(fill = "white"),
        strip.text.y = element_text(face = "bold")) +
  facet_wrap(~type) +
  scale_color_manual(values = c("black", "black")) +
  geom_rect(data = data.frame(type = "White Democratic respondents"), aes(xmin = 0.42, xmax = 0.72, ymin = 0.6, ymax = 6.2), color = "darkgrey", alpha = 0) +
  geom_rect(data = data.frame(type = "Black Democratic respondents"), aes(xmin = 0.49, xmax = 0.79, ymin = 0.7, ymax = 2.5), color = "darkgrey", alpha = 0) +
  geom_rect(data = data.frame(type = "White Republican respondents"), aes(xmin = 0.41, xmax = 0.60, ymin = 1.7, ymax = 2.5), color = "darkgrey", alpha = 0) +
  geom_text(data = data.frame(type = "White Democratic respondents"), x = 0.4, y = 0.6, label = "Original studies", angle = 90, family = "serif", hjust = 0, color = "grey30", size = 3) +
  xlab("Marginal means for Black candidates with white opponents")

ggsave(filename = "figb1.pdf", path = "results", units = "in", width = 6.5, height = 9, dpi=700)


# TABLE B2 ----

options(knitr.kable.NA = '')
mmtab <- read.csv("data/clean/appx_conjoint_mms.csv") %>% dplyr::select(-X)
tabb1 <- knitr::kable(mmtab, booktabs = T, caption = "Pooled marginal means from original conjoint studies.", digits = 3, escape = F, col.names = c("Attribute", "Marginal mean", "Black MM - white MM"), full_width = T, longtable = T, format = "latex") %>%
  pack_rows("Race (all studies)", 1, 2) %>%
  pack_rows("Gender (Lucid 4 and 5 and CA voter study)", 3,4) %>%
  pack_rows("Age (all studies)", 5, 9) %>%
  pack_rows("Occupation (all studies)", 10, 15) %>%
  pack_rows("Political experience (all studies)", 16, 20) %>%
  pack_rows("Endorsement (Lucid 1, 2, 3, 4 and CA voter study)", 21, 24) %>%
  pack_rows("Endorsement (Lucid 5)", 25, 28) %>%
  pack_rows("Publicly funded healthcare (Lucid 1, 1/3 Lucid 2, Lucid 4)", 29, 31) %>%
  pack_rows("Fossil fuels (Lucid 1, 1/3 Lucid 2, Lucid 4)", 32, 34) %>%
  pack_rows("Reparations (Lucid 1, 1/3 Lucid 2, Lucid 4)", 35, 36) %>%
  pack_rows("Candidate self-placement (2/3 Lucid 2, Lucid 3, CA voter study)", 37, 41) %>%
  pack_rows("Priority if elected (Lucid 5)", 42, nrow(mmtab)) %>%
  kable_styling(font_size = 9, latex_options = c("hold_position", "repeat_header")) %>%
  footnote(general = "$*p<0.05$; $**p<0.01$; $***p<0.001$. P-values for rates of support indicate significance of difference from 0.5. Estimates are unweighted.", escape = F, threeparttable = T)

save_kable(tabb1, "results/tabb2.tex",float = FALSE)

# TABLE B3 ----

dems <- read.csv("data/clean/original_study_demographics.csv") %>%
  pivot_longer(c(age, female, educ, hhinc, region), names_to = "feature", values_to = "value") %>%
  group_by(sample, feature) %>%
  mutate(total_n = sum(!is.na(value))) %>%
  ungroup() %>%
  group_by(sample, feature, value) %>%
  mutate(n = n(),
         share = round(n/total_n*100, 1),
         res = paste(share, "% (N = ", n, ")", sep = "")) %>%
  dplyr::select(c(sample, feature, value, res)) %>%
  distinct() %>%
  pivot_wider(names_from = sample, values_from = res) %>%
  mutate(feature = case_when(feature=="age"~"age group",
                             feature=="female"~"gender",
                             feature=="educ"~"education",
                             feature=="hhinc"~"household income",
                             feature=="region"~"region"),
         value = case_when(is.na(value)~paste("Missing", feature), T~value),
         value = factor(value,
                        levels = c(
                          "18-29", "30-39", "40-49", "50-64", "65+", "Missing age group",
                          "Female", "Male", "Missing gender",
                          "Northeast", "Midwest", "South", "West", "Missing region",
                          "Less than HS", "High school", "Some college", "Bachelor's degree", "Post-secondary degree", "Missing education",
                          "$24,999 or less", "$25k-$54,999", "$55k-$79,999", "$80k-$149,999", "$150k or more",
                          "$25k-$49,999 (CA sample)", "$50k-$74,999 (CA sample)", "$75k-$99,999 (CA sample)", "$100k-$149,999 (CA sample)", "Missing household income"),
                        ordered = T)
  ) %>%
  ungroup() %>%
  dplyr::select(-c(feature)) %>%
  arrange(value)

tabb2 <- landscape(kable(dems, booktabs = T,
                caption = "Demographic characteristics by sample.",
                col.names = c("", "Lucid 1", "Lucid 2", "Lucid 3", "Lucid 4", "Lucid 5", "California voter study"),
                align = "lcccccc",
                full_width = T,
                format = "latex"
)) %>%
  kable_styling(font_size = 9) %>%
  column_spec(2:7, width = "2.5cm") %>%
  pack_rows("Age", 1, 6) %>%
  pack_rows("Gender", 7, 9) %>%
  pack_rows("Region", 10, 14) %>%
  pack_rows("Education", 15, 20) %>%
  pack_rows("Household income", 21, nrow(dems))

save_kable(tabb2, "results/tabb3.tex",float = FALSE)

# TABLE B4 ----

dems <- read.csv("data/clean/appx_demtab.csv") %>% dplyr::select(-X)
tabb4 <- landscape(kable(dems, booktabs = T, 
                caption = "Demographic characteristics and support for Black candidates among white Democrats.",
                col.names = c("Participant demographics", "Age marginal means", "Gender marginal means", "Income marginal means", "Education marginal means", "Region marginal means", "Multivariate OLS regression coefficients"),
                align = "lcccccc",
                full_width = T,
                format = "latex")) %>%
  kable_styling(font_size = 9) %>%
  column_spec(2:6, width = "2.3cm") %>%
  column_spec(7, width = "2.9cm") %>%
  pack_rows("Age", 1, 5) %>%
  pack_rows("Gender", 6, 7) %>%
  pack_rows("Household income", 8, 16) %>%
  pack_rows("Education", 17, 21) %>%
  pack_rows("Education", 22, nrow(dems)-2) %>%
  footnote(general = "$*p<0.05$; $**p<0.01$; $***p<0.001$. P-values for columns 1-4 indicate significance of difference from 0.5; p-values for column 5 indicate significance of difference from 0. Note that income was coded differently in the California data study.", threeparttable = T) %>% 
  add_header_above(header = c(" " = 1, "Black candidate selected" = 6))

save_kable(tabb4, "results/tabb4.tex",float = FALSE)

# TABLE B5 ----

# Table B5 uses raw data from conjoints included in the meta-analysis.

# TABLE B6 ----

# Table B6 uses raw data from conjoints included in the meta-analysis.

# TABLE C2 ----

attitudestab <- read.csv("data/clean/attitudestab.csv")
mods <- list(
  lm(chosen_candidate ~ rr_index, weights = weight, attitudestab),
  lm(chosen_candidate ~ rr_index + selfmon_index + age + region + factor(gender) + educ + hhinc, weights = weight, attitudestab),
  lm(chosen_candidate ~ disc_black, weights = weight, attitudestab),
  lm(chosen_candidate ~ disc_black + selfmon_index + age + region + factor(gender) + educ + hhinc, weights = weight, attitudestab),
  lm(chosen_candidate ~ therm_trump, weights = weight, attitudestab),
  lm(chosen_candidate ~ therm_trump + selfmon_index + age + region + factor(gender) + educ + hhinc, weights = weight, attitudestab),
  lm(chosen_candidate ~ therm_biden, weights = weight, attitudestab),
  lm(chosen_candidate ~ therm_biden + selfmon_index + age + region + factor(gender) + educ + hhinc, weights = weight, attitudestab),
  lm(chosen_candidate ~ therm_trump + therm_biden + selfmon_index + age + region + factor(gender) + educ + hhinc, weights = weight, attitudestab)
)

tabc2 <- modelsummary(mods,
             stars = T,
             escape = F,
             estimate = "{estimate}{stars}",
             statistic = "({std.error})",
             gof_map = c("nobs", "r.squared"),
             title = "Perceptions of racial injustice, presidential feeling thermometers, and support for Black candidates among white Democratic survey participants.",
             coef_map = c("disc_black" = "Anti-Black discrimination",
                          "rr_index" = "Racial resentment",
                          "therm_trump" = "Trump FT",
                          "therm_biden" = "Biden FT",
                          "selfmon_index" = "Self-monitoring",
                          "age" = "Age",
                          "regionnortheast" = "Northeast",
                          "regionsouth" = "South",
                          "regionwest" = "West",
                          "factor(gender)2" = "Female",
                          "educ" = "Education",
                          "hhinc" = "Household income",
                          "(Intercept)" = "Intercept"),
             output = "kableExtra",
             format = "latex") %>%
  add_header_above(c(" " = 1, "DV: Black candidate selected" = 9)) %>%
  footnote("This table presents the results of ordinary least squares regression models weighted for representativeness on age, gender, and region. Perceptions of anti-Black discrimination, racial resentment, presidential feeling thermometers, self-monitoring, education, and household income are scaled from 0 to 1. Age is in years. The omitted categories are midwestern men. Data for Columns (1)-(4) are from Lucid Study 1. Data for Columns (5)-(9) are from Lucid Study 2.", threeparttable = T) %>%
  kable_styling(font_size = 9)

save_kable(tabc2, "results/tabc2.tex",float = FALSE)

# TABLE C3 ----

options(knitr.kable.NA = '')
repstab <- read.csv("data/clean/reparationstab.csv") %>% 
  dplyr::select(-X)

tabc3 <- landscape(kable(repstab,
                booktabs = T,
                caption = "Support for candidates based on race and stances on reparations",
                col.names = c(
                  "Candidate stances (this candidate/opponent)",
                  "Rate of support for Black candidates",
                  "Weighted N",
                  "Rate of support for white candidates",
                  "Weighted N",
                  "Black minus white difference"
                ),
                align = "lccccc",
                escape = F,
                linesep = "\\addlinespace", full_width = T,
                format = "latex"
)) %>%
  column_spec(1, width = "4cm") %>%
  column_spec(2, width = "3.2cm") %>%
  column_spec(c(3,5), width = "0.8cm") %>%
  column_spec(4, width = "3.2cm") %>%
  column_spec(6, width = "3cm") %>%
  kable_styling(font_size = 9, latex_options = "HOLD_position") %>%
  footnote(general = "$*p<0.05$; $**p<0.01$; $***p<0.001$. P-values for rates of support indicate significance of difference from 0.5. Data are from Lucid Studies 1 and 2.", threeparttable = T, escape = F) %>%
  pack_rows("Respondent supports reparations or does not know", 1, 4, latex_gap_space = "1em", indent = F) %>%
  pack_rows("Respondent opposes reparations", 5, 8, latex_gap_space = "1em", indent = F)

save_kable(tabc3, "results/tabc3.tex",float = FALSE)


# TABLE C4 ----

dat_ideo <- read.csv("data/clean/dat_ideo.csv", stringsAsFactors=T) %>%
  mutate(race = factor(race, levels = c("White", "Black"), ordered =T))
mod1 <- lfe::felm(chosen_candidate ~ abs_ideo_distance * factor(race, ordered = F)|0|0|X, weights = dat_ideo$weight, dat_ideo)
tabc4 <- landscape(modelsummary(mod1,
                       stars = T,
                       escape = F,
                       gof_map = c("nobs", "r.squared"),
                       title = "Effect of ideological incongruence and race on proportion selecting candidate",
                       coef_map = c("abs_ideo_distance" = "Absolute ideological distance",
                                    "factor(race, ordered = F)Black" = "Black candidate",
                                    "abs_ideo_distance:factor(race, ordered = F)Black" = "Absolute ideological distance $\\times$ Black candidate",
                                    "(Intercept)" = "Intercept"),
                       output = "kableExtra",
                       format = "latex")) %>%
  kable_styling(font_size = 9, latex_options = "HOLD_position") %>%
  add_header_above(c(" " = 1, "DV: Proportion selecting candidate profile" = 1)) %>%
  footnote("Absolute ideological distance scaled is from 0 (perfect congruence) to 1 (maximum incongruence). Standard errors are clustered at participant level. Data are weighted for representativeness by age, gender, and region. Data are from Lucid study 2 and the California voter survey.", threeparttable = T)

save_kable(tabc4, "results/tabc4.tex",float = FALSE)


# TABLE C5 ----

ideotab <- read.csv("data/clean/ideotab.csv") %>%
  dplyr::select(-X)
tabc5 <- kable(ideotab,
      booktabs = T,
      caption = "Rates of support for Black and white candidates in conjoint task by ideological congruence with the participant relative to their opponent.",
      col.names = c(
        "Relative congruence", 
        "Rate of support for Black candidates", 
        "Weight -ed N",
        "Rate of support for white candidates", 
        "Weight -ed N",
        "Black - white difference"
      ),
      align = "lccccc",
      escape = F,
      linesep = "\\addlinespace", full_width = T,
      format = "latex"
) %>%
  column_spec(1, width = "1.8cm") %>%
  column_spec(c(2,4,6), width = "3.5cm") %>%
  column_spec(c(3,5), width = "0.5cm") %>%
  pack_rows("Results presented in paper", 1, 9) %>%
  pack_rows("Results including \"conservative\" and \"very conservative\" participants", 10, 18) %>%
  pack_rows("Results using perceived congruence", 19, nrow(ideotab)) %>%
  kable_styling(font_size = 9, latex_options = "HOLD_position") %>%
  footnote(general = "$*p<0.05$; $**p<0.01$; $***p<0.001$. P-values for rates of support indicate significance of difference from 0.5. Congruence is scaled from -1 (opponent is perfectly congruent and candidate is as far as possible from the participant) to 1 (candidate is perfectly congruent and opponent is as far as possible from the participant). Data are weighted for representativeness by age, gender, and region. Data for the first two sets of results are from Lucid Study 2 and the California voter survey; data for the third set (perceived ideological congruence) are from Lucid Study 2 only.", threeparttable = T, escape = F)

save_kable(tabc5, "results/tabc5.tex",float = FALSE)


# TABLE D1 ----
targets <- read.csv("data/raw/genderageregionweights.csv") %>% dplyr::select(-X)

lucid1 <- read.csv("data/raw/study1.csv") %>%
  filter(race=="Black") %>%
  mutate(chosen_candidate = as.numeric(won),
         age = rescale(age, to = c(18, 83)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = tolower(region),
         gender = case_when(female==0~1, female==1~2)) %>% 
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup()

tabd1 <- modelsummary(list("DV: Voted for Black profile" = lm(chosen_candidate ~ selfmon_index, weights = weight, lucid1),
                  "DV: Important to vote for POC" = lm(votecons_poc ~ selfmon_index, weights = weight, lucid1)),
             stars = T,
             escape = F,
             gof_map = c("nobs", "r.squared"),
             title = "Self-monitoring and voting for candidates of color among white Democratic study participants.",
             coef_map = list("selfmon_index" = "Self-monitoring index",
                             "(Intercept)" = "Intercept"),
             output = "kableExtra",
             format = "latex") %>%
  footnote("This table presents the results of linear regression models. The dependent variable in the first column is coded 0 if the participant selected a white candidate profile and 1 if the participant selected a Black candidate profile in a conjoint task. The dependent variable in the second column is an item asking how important it is that a candidate is a person of color. The explanatory variable is self-monitoring, which is coded from 0 (low) to 1 (high). Data are weighted for demographic representativeness by gender, region, and age group. Data are from Lucid Study 1.", threeparttable = T)

save_kable(tabd1, "results/tabd1.tex",float = FALSE)

# TABLE D2 ----

# Table D2 uses raw data from conjoints included in the meta-analysis.

# TABLE D3 ----

lucid1 <- read.csv("data/raw/study1.csv") %>%
  mutate(chosen_candidate = as.numeric(won),
         age = rescale(age, to = c(18, 83)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = tolower(region),
         gender = case_when(female==0~1, female==1~2)) %>% 
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup() %>%
  dplyr::select(c(chosen_candidate, race, weight, goodchance, swingvotes, demvotes))

lucid2 <- read_csvy("data/raw/study2.csv", stringsAsFactors = TRUE) %>%
  mutate(age = rescale(age, to = c(18, 97)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = case_when(region!=""~tolower(region), T~NA_character_),
         gender = case_when(female==0~1, female==1~2)) %>%
  pivot_longer(c(therm_trump, therm_biden), names_to = "outcome", values_to = "value") %>%
  mutate(outcome = case_when(grepl("trump", outcome)~"Trump feeling thermometer",
                             T~"Biden feeling thermometer"),
         value = round(value, 1)) %>%
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup() %>%
  dplyr::select(c(chosen_candidate, race, weight, goodchance, swingvotes, demvotes))

ratings <- rbind(lucid1, lucid2) %>%
  mutate(race = factor(race, levels =c("White", "Black"), ordered = T))

lucid1 <- read.csv("data/raw/study1.csv") %>%
  filter(race=="Black") %>%
  mutate(chosen_candidate = as.numeric(won),
         age = rescale(age, to = c(18, 83)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = tolower(region),
         gender = case_when(female==0~1, female==1~2)) %>% 
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup()

mod1 <- lm(chosen_candidate ~ factor(strength_dem), weights = weight, lucid1)
mod2 <- lm(chosen_candidate ~ factor(strength_dem) + rr_index, weights = weight, lucid1)
mod3 <- lm(goodchance ~ race, weights = weight, ratings)
mod4 <- lm(swingvotes ~ race, weights = weight, ratings)
mod5 <- lm(demvotes ~ race, weights = weight, ratings)

tabd3 <- modelsummary(list(mod1, mod2, mod3, mod4, mod5),
             stars = T,
             escape = F,
             gof_map = c("nobs", "r.squared"),
             title = "Black candidate profiles and partisan considerations.",
             coef_map = list("factor(strength_dem)0" = "Lean Democrat",
                             "factor(strength_dem)0.5" = "Moderate Democrat",
                             "factor(strength_dem)1" = "Strong Democrat",
                             "rr_index" = "Racial resentment",
                             "race.L" = "Black candidate",
                             "(Intercept)" = "Intercept"),
             output = "kableExtra",
             format = "latex") %>%
  add_header_above(c(" " = 3, "has a good chance\nof winning the\ngeneral election" = 1,
                     "can appeal\nto swing\nvoters" = 1,
                     "can appeal\nto Democratic\nbase" = 1)) %>%
  add_header_above(c(" " = 1, "DV: Black candidate selected" = 2, "DV: This candidate..." = 3)) %>%
  column_spec(3:5, width = "2cm") %>%
  kable_styling(font_size = 9) %>%
  footnote("This table presents the results of linear regression models. In Columns (1)-(2), the dependent variable is selecting a Black candidate profile and the explanatory variable is the strength of participants' Democratic partisanship. Data are from Lucid Study 1. In Columns (3)-(5), the dependent variable is ratings of the candidate and the explanatory variable is the race of the candidate. White is the reference category. Data are from Lucid Studies 1 and 2. Data in all models are weighted for demographic representativeness by gender, region, and age group.", threeparttable = T)

save_kable(tabd3, "results/tabd3.tex",float = FALSE)


# TABLE D4 ----

iditems <- readxl::read_xlsx("data/clean/id_shame_items_tab.xlsx")

lucid2 <- read_csvy("data/raw/study2.csv", stringsAsFactors = TRUE) %>%
  filter(race=="Black") %>%
  mutate(age = rescale(age, to = c(18, 97)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = case_when(region!=""~tolower(region), T~NA_character_),
         gender = case_when(female==0~1, female==1~2)) %>%
  pivot_longer(c(therm_trump, therm_biden), names_to = "outcome", values_to = "value") %>%
  mutate(outcome = case_when(grepl("trump", outcome)~"Trump feeling thermometer",
                             T~"Biden feeling thermometer"),
         value = round(value, 1)) %>%
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup()

whiteiditems <- lucid2 %>% dplyr::select(consc1, consc2, consc3, consc4, consc5)
idalpha <- cronbach.alpha(whiteiditems, CI = T)$alpha

whitevalitems <- lucid2 %>% dplyr::select(val1_1, val1_2, val1_3, val1_4, val2_1, val2_2, val2_3, val2_4) %>% na.omit
valalpha <- cronbach.alpha(whitevalitems, CI = T)$alpha

moralshameitems <- lucid2 %>% dplyr::select(c(shame_moral_1, shame_moral_2, shame_moral_3, shame_moral_4)) %>% na.omit
moralpha <- cronbach.alpha(moralshameitems, CI = T)$alpha

imageshameitems <- lucid2 %>% dplyr::select(c(shame_image_1, shame_image_2, shame_image_3)) %>% na.omit
imalpha <- cronbach.alpha(imageshameitems, CI = T)$alpha

options(knitr.kable.NA = '')
tabd4 <- kable(iditems, booktabs = T, caption = "White identity consciousness, identity valence, and shame items", digits = 3, escape = F, full_width = T,
      col.names = c("", "Mean", "Standard deviation"), format = "latex") %>%
  kable_styling(font_size = 9, latex_options = "HOLD_position") %>%
  column_spec(1, width = "10cm") %>%
  column_spec(2, width = "2cm") %>%
  column_spec(3, width = "2cm") %>% 
  pack_rows(paste("White identity consciousness ($\\\\alpha = $ ", round(idalpha, 2), ")", sep = ""), 1,5, latex_gap_space = "1em", indent = F, escape = F) %>%
  pack_rows(paste("White identity valence ($\\\\alpha = $ ", round(valalpha, 2), ")", sep = ""),
            6,13, latex_gap_space = "1em", indent = F, escape = F) %>%
  pack_rows("Part I:\n\"Please indicate the extent to which you think being white has affected your life in the\nfollowing areas, from making things much harder (0) to making things much easier (1):\"", 6,9, latex_gap_space = "1em", indent = F) %>%
  pack_rows("Part II:\n\"Please tell us how strongly you agree (1) or disagree (0) with the following statements:\"", 10,13, latex_gap_space = "1em", indent = F) %>%
  pack_rows(paste("White moral shame ($\\\\alpha = $ ", round(moralpha, 2), ")", sep = ""), 14,17, latex_gap_space = "1em", indent = F, escape = F) %>%
  pack_rows("\"Please tell us how strongly you agree (1) or disagree (0) with the following statements:\"", 14, 17, latex_gap_space = "0em", indent = F) %>%
  pack_rows(paste("White image shame ($\\\\alpha = $ ", round(imalpha, 2), ")", sep = ""), 18,20, latex_gap_space = "1em", indent = F, escape = F) %>%
  pack_rows("\"Please tell us how strongly you agree (1) or disagree (0) with the following statements:\"", 18,20, latex_gap_space = "0em", indent = F, escape = F) %>%
  footnote(general = "Data are from Lucid Study 2.", threeparttable = T)

save_kable(tabd4, "results/tabd4.tex",float = FALSE)


# FIGURE D1 ----

lucid2 <- read_csvy("data/raw/study2.csv", stringsAsFactors = TRUE) %>%
  filter(race=="Black") %>%
  mutate(age = rescale(age, to = c(18, 97)),
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<100~"60p"),
         region = case_when(region!=""~tolower(region), T~NA_character_),
         gender = case_when(female==0~1, female==1~2)) %>%
  pivot_longer(c(therm_trump, therm_biden), names_to = "outcome", values_to = "value") %>%
  mutate(outcome = case_when(grepl("trump", outcome)~"Trump feeling thermometer",
                             T~"Biden feeling thermometer"),
         value = round(value, 1)) %>%
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup() %>%
  mutate(`White identity valence` = case_when(val_index > 0.5~"Positive",
                                              val_index <=0.5 ~ "Negative"))

figd1 <- ggplot(lucid2, aes(x = consc_index, y = chosen_candidate, color = `White identity valence`)) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "darkgrey") +
  geom_smooth(method = "lm") +
  theme_bw() +
  scale_color_manual(values = c("darkgrey", "black")) +
  xlab("White identity consciousness") +
  ylab("Estimated proportion selecting Black candidate") +
  theme(text = element_text(family = "serif"),
        panel.grid = element_blank())

ggsave(filename = "figd1.pdf", path = "results", units = "in", width = 6.5, dpi=700)


# TABLE D5 ----

therms <- read.csv("data/raw/ca_omnibus.csv") %>%
  mutate(region = "west",
         age_group = case_when(age<30~"18-29",
                               age<40~"30-39",
                               age<50~"40-49",
                               age<60~"50-59",
                               age<150~"60p"),
         gender = case_when(gender=="Male"~1, gender=="Female"~2))  %>%
  mutate(weightgroup = "region x ag x gender",
         weightvar = paste(region, age_group, gender, sep = " x "),
         group = "2022 x White x Democrat") %>%
  left_join(targets) %>%
  group_by(group, weightgroup) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  group_by(weightgroup, group, weightvar) %>%
  mutate(weight = grouptarget * n / n(),
         subgroup_n = n(),
         weight = case_when(weight > 5 ~ 5, 
                            weight < 0.1 ~ 0.1,
                            is.na(weight)~1,
                            T ~ weight) # weights are trimmed to 5 #https://zacharylhertz.github.io/posts/2022/05/weighting-surveys
  ) %>%
  ungroup() %>%
  group_by(group, weightgroup) %>%
  mutate(weight = weight / mean(weight, na.rm = T)) %>%
  ungroup() %>%
  mutate(thermdiff = therm_blacks - therm_whites) %>%
  filter(candrace=="Black")

tabd5 <- modelsummary(list(
  lm(chosen_candidate ~ consc_index, weights = weight, data = lucid2),
  lm(chosen_candidate ~ val_index, weights = weight, data = lucid2),
  lm(chosen_candidate ~ consc_index * val_index, weights = weight, data = lucid2),
  lm(chosen_candidate ~ shame_moral_index, weights = weight, data = lucid2),
  lm(chosen_candidate ~ shame_image_index, weights = weight, data = lucid2),
  lm(chosen_candidate ~ consc_index + val_index + shame_moral_index + shame_image_index, data = lucid2),
  lm(chosen_candidate ~ therm_blacks, weights = weight, data = therms),
  lm(chosen_candidate ~ therm_whites, weights = weight, data = therms),
  lm(chosen_candidate ~ thermdiff, weights = weight, data = therms)
),
stars = T,
escape = F,
gof_map = c("nobs", "r.squared"),
title = "White identity, racial group feeling thermometers, and support for Black candidate profiles.",
coef_map = list("consc_index" = "White ID consciousness",
                "val_index" = "White ID valence",
                "consc_index:val_index" = "Consciousness x valence",
                "shame_moral_index" = "White moral shame",
                "shame_image_index" = "White image shame",
                "therm_blacks" = "Black FT",
                "therm_whites" = "White FT",
                "thermdiff" = "Black - white FT",
                "(Intercept)" = "Intercept"),
output = "kableExtra",
format = "latex"
) %>%
  add_header_above(c(" " = 1, "DV: Black candidate selected" = 9)) %>%
  kable_styling(font_size = 8) %>%
  footnote("This table presents the results of linear regression models. The dependent variable is selecting a Black candidate profile. The explanatory variables, white ID consciousness, white ID valence, white moral and image shame, Black and white group feeling thermometers, and the difference between the two thermometer ratings, are all scaled from 0 (low/negative) to 1 (high/positive). Data in Columns (1)-(6) are from Lucid Study 2 and data in Columns (7)-(9) are from the California voter study. Data in all models are weighted for demographic representativeness by gender, region, and age group.", threeparttable = T)

save_kable(tabd5, "results/tabd5.tex",float = FALSE)
